{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 1. 创建数据\n",
    "# 2. 计算相似度\n",
    "# 3. 进行推荐"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn.metrics import jaccard_score\n",
    "from sklearn.metrics.pairwise import pairwise_distances"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 算法思想：物以类聚，人以群分"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 基于用户的协同过滤"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 1. 创建数据\n",
    "users = [\"User1\", \"User2\", \"User3\", \"User4\", \"User5\"]\n",
    "items = [\"Item A\", \"Item B\", \"Item C\", \"Item D\", \"Item E\"]\n",
    "# 用户购买记录数据集\n",
    "datasets = [\n",
    "    [1,0,1,1,0],\n",
    "    [1,0,0,1,1],\n",
    "    [1,0,1,0,0],\n",
    "    [0,1,0,1,1],\n",
    "    [1,1,1,0,1],\n",
    "]\n",
    "\n",
    "# 1.1 将数据转换为DataFrame\n",
    "df = pd.DataFrame(datasets, index=users, columns=items)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Item A</th>\n",
       "      <th>Item B</th>\n",
       "      <th>Item C</th>\n",
       "      <th>Item D</th>\n",
       "      <th>Item E</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>User1</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>User2</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>User3</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>User4</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>User5</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       Item A  Item B  Item C  Item D  Item E\n",
       "User1       1       0       1       1       0\n",
       "User2       1       0       0       1       1\n",
       "User3       1       0       1       0       0\n",
       "User4       0       1       0       1       1\n",
       "User5       1       1       1       0       1"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.2"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 测试物品A与物品B的杰卡德相关系数\n",
    "jaccard_score(df['Item A'], df['Item B'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "f:\\virtual_environment\\ai\\lib\\site-packages\\sklearn\\metrics\\pairwise.py:1765: DataConversionWarning: Data was converted to boolean for metric jaccard\n",
      "  warnings.warn(msg, DataConversionWarning)\n"
     ]
    }
   ],
   "source": [
    "# 2 计算相似度，现在pairwise_distances第一个参数得是ndarray类型，计算的是横行的距离，\n",
    "# 1 - 距离就是杰卡德相关系数\n",
    "user_similary = 1 - pairwise_distances(np.array(datasets), metric='jaccard')\n",
    "user_similary = pd.DataFrame(user_similary, index=users, columns=users)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>User1</th>\n",
       "      <th>User2</th>\n",
       "      <th>User3</th>\n",
       "      <th>User4</th>\n",
       "      <th>User5</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>User1</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.50</td>\n",
       "      <td>0.666667</td>\n",
       "      <td>0.2</td>\n",
       "      <td>0.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>User2</th>\n",
       "      <td>0.500000</td>\n",
       "      <td>1.00</td>\n",
       "      <td>0.250000</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>User3</th>\n",
       "      <td>0.666667</td>\n",
       "      <td>0.25</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>User4</th>\n",
       "      <td>0.200000</td>\n",
       "      <td>0.50</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>User5</th>\n",
       "      <td>0.400000</td>\n",
       "      <td>0.40</td>\n",
       "      <td>0.500000</td>\n",
       "      <td>0.4</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          User1  User2     User3  User4  User5\n",
       "User1  1.000000   0.50  0.666667    0.2    0.4\n",
       "User2  0.500000   1.00  0.250000    0.5    0.4\n",
       "User3  0.666667   0.25  1.000000    0.0    0.5\n",
       "User4  0.200000   0.50  0.000000    1.0    0.4\n",
       "User5  0.400000   0.40  0.500000    0.4    1.0"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "user_similary"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 2.1 提取和当前用户最相关的两个用户\n",
    "top_users = {}\n",
    "for user in user_similary.index:\n",
    "    # 遍历用户相似表的数据，删除每行数据中相似度为1 的数据，然后对其他数据进行排序，提取前两位用户\n",
    "    top2_user = list(user_similary[user].drop(user).sort_values(ascending=False).index)[:2]\n",
    "    top_users[user] = top2_user"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# user_similary['User1'].drop('User1').sort_values(ascending=False)  # Series"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'User1': ['User3', 'User2'],\n",
       " 'User2': ['User4', 'User1'],\n",
       " 'User3': ['User1', 'User5'],\n",
       " 'User4': ['User2', 'User5'],\n",
       " 'User5': ['User3', 'User4']}"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "top_users"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 2.2 根据相关用户进行数据过滤，然后创建推荐数据集\n",
    "rs_results = {}\n",
    "for user, re_users in top_users.items():\n",
    "    rs_result = set()\n",
    "    for re_user in re_users:\n",
    "        # 取相关用户购买物品的并集\n",
    "        # 集合对象.union()返回的是一个对象\n",
    "        rs_result = rs_result.union(set(df.loc[re_user, :].replace(0, np.nan).dropna().index))\n",
    "    # 从相关用户购买商品的并集中删除当前用户购买的种类\n",
    "    rs_result -= set(df.loc[user].replace(0, np.nan).dropna().index)\n",
    "    # 将给当前用户推荐的结果加入推荐字典中\n",
    "    rs_results[user] = list(rs_result)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'User1': ['Item E'],\n",
       " 'User2': ['Item B', 'Item C'],\n",
       " 'User3': ['Item B', 'Item E', 'Item D'],\n",
       " 'User4': ['Item C', 'Item A'],\n",
       " 'User5': ['Item D']}"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 3 进行推荐\n",
    "rs_results"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 基于物品的协同过滤"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Item A</th>\n",
       "      <th>Item B</th>\n",
       "      <th>Item C</th>\n",
       "      <th>Item D</th>\n",
       "      <th>Item E</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>User1</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>User2</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>User3</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>User4</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>User5</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       Item A  Item B  Item C  Item D  Item E\n",
       "User1       1       0       1       1       0\n",
       "User2       1       0       0       1       1\n",
       "User3       1       0       1       0       0\n",
       "User4       0       1       0       1       1\n",
       "User5       1       1       1       0       1"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 1. 获取数据\n",
    "users = [\"User1\", \"User2\", \"User3\", \"User4\", \"User5\"]\n",
    "items = [\"Item A\", \"Item B\", \"Item C\", \"Item D\", \"Item E\"]\n",
    "# 用户购买记录数据集\n",
    "datasets = [\n",
    "    [1,0,1,1,0],\n",
    "    [1,0,0,1,1],\n",
    "    [1,0,1,0,0],\n",
    "    [0,1,0,1,1],\n",
    "    [1,1,1,0,1],\n",
    "]\n",
    "\n",
    "df = pd.DataFrame(datasets, index=users, columns=items)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "f:\\virtual_environment\\ai\\lib\\site-packages\\sklearn\\metrics\\pairwise.py:1765: DataConversionWarning: Data was converted to boolean for metric jaccard\n",
      "  warnings.warn(msg, DataConversionWarning)\n"
     ]
    }
   ],
   "source": [
    "# 2. 计算相似度\n",
    "items_similary =  1 - pairwise_distances(np.array(datasets).T, metric='jaccard')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "items_similary = pd.DataFrame(items_similary, index=items, columns=items)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Item A</th>\n",
       "      <th>Item B</th>\n",
       "      <th>Item C</th>\n",
       "      <th>Item D</th>\n",
       "      <th>Item E</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Item A</th>\n",
       "      <td>1.00</td>\n",
       "      <td>0.200000</td>\n",
       "      <td>0.75</td>\n",
       "      <td>0.40</td>\n",
       "      <td>0.400000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Item B</th>\n",
       "      <td>0.20</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.25</td>\n",
       "      <td>0.25</td>\n",
       "      <td>0.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Item C</th>\n",
       "      <td>0.75</td>\n",
       "      <td>0.250000</td>\n",
       "      <td>1.00</td>\n",
       "      <td>0.20</td>\n",
       "      <td>0.200000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Item D</th>\n",
       "      <td>0.40</td>\n",
       "      <td>0.250000</td>\n",
       "      <td>0.20</td>\n",
       "      <td>1.00</td>\n",
       "      <td>0.500000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Item E</th>\n",
       "      <td>0.40</td>\n",
       "      <td>0.666667</td>\n",
       "      <td>0.20</td>\n",
       "      <td>0.50</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        Item A    Item B  Item C  Item D    Item E\n",
       "Item A    1.00  0.200000    0.75    0.40  0.400000\n",
       "Item B    0.20  1.000000    0.25    0.25  0.666667\n",
       "Item C    0.75  0.250000    1.00    0.20  0.200000\n",
       "Item D    0.40  0.250000    0.20    1.00  0.500000\n",
       "Item E    0.40  0.666667    0.20    0.50  1.000000"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "items_similary"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 2.1 创建与当前物品相似的字典数据\n",
    "sim_items_dict = {}\n",
    "for item in items_similary.index:\n",
    "    # 将与当前物品最相似的两个物品加入字典中\n",
    "    sim_items = list(items_similary.loc[item].drop(item).sort_values(ascending=False).index)[:2]\n",
    "    sim_items_dict[item] = sim_items\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'Item A': ['Item C', 'Item E'],\n",
       " 'Item B': ['Item E', 'Item D'],\n",
       " 'Item C': ['Item A', 'Item B'],\n",
       " 'Item D': ['Item E', 'Item A'],\n",
       " 'Item E': ['Item B', 'Item D']}"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sim_items_dict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 2.2 创建推荐字典\n",
    "rs_results = {}\n",
    "for user in df.index:\n",
    "    # 获取给该用户的推荐结果\n",
    "    rs_result = set()\n",
    "    for item in df.columns:\n",
    "        if df[item][user] == 1:\n",
    "            rs_result = rs_result.union(set(sim_items_dict[item]))\n",
    "    rs_result -= set(df.loc[user].replace(0, np.nan).dropna().index)\n",
    "    rs_results[user] = rs_result\n",
    "            "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'User1': {'Item B', 'Item E'},\n",
       " 'User2': {'Item B', 'Item C'},\n",
       " 'User3': {'Item B', 'Item E'},\n",
       " 'User4': {'Item A'},\n",
       " 'User5': {'Item D'}}"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 3. 进行推荐\n",
    "rs_results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "a = set((1, 2 , 3))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "set"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(a)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "a -= {2}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{1, 3}"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1, 2, 3, 4)"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.random.randn(1, 2, 3, 4).shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 1.35710746,  1.75740954,  0.10262824, -0.00969784, -0.08732885],\n",
       "       [ 0.05048547, -1.10409446,  2.1457781 , -0.04299009, -1.05978908],\n",
       "       [ 0.72717314, -0.39131396,  0.50104865,  1.38049512,  0.89127942],\n",
       "       [-1.00386353, -1.16882914, -1.07221505, -0.97611064, -0.08634205]])"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.random.normal(0, 1, size=(4, 5))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 1.36744247, -0.27419059]])"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.random.standard_normal(size=(1, 2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(10000,)"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.random.uniform(0, 1, 10000).shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "a = np.random.randint(1, 2, size=(4, 5))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[1, 1, 1, 1],\n",
       "       [1, 1, 1, 1],\n",
       "       [1, 1, 1, 1],\n",
       "       [1, 1, 1, 1],\n",
       "       [1, 1, 1, 1]])"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a.reshape((5, 4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [],
   "source": [
    "b = pd.DataFrame(np.random.randn(4, 5))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.144132</td>\n",
       "      <td>3.166207</td>\n",
       "      <td>0.453888</td>\n",
       "      <td>-0.120201</td>\n",
       "      <td>-0.612896</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.003320</td>\n",
       "      <td>1.248735</td>\n",
       "      <td>-0.690009</td>\n",
       "      <td>0.005598</td>\n",
       "      <td>-1.576791</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.125238</td>\n",
       "      <td>-0.397211</td>\n",
       "      <td>-0.054644</td>\n",
       "      <td>-0.764264</td>\n",
       "      <td>-0.387677</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-0.989066</td>\n",
       "      <td>0.077090</td>\n",
       "      <td>-1.570408</td>\n",
       "      <td>-0.004322</td>\n",
       "      <td>-0.660991</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          0         1         2         3         4\n",
       "0  0.144132  3.166207  0.453888 -0.120201 -0.612896\n",
       "1  1.003320  1.248735 -0.690009  0.005598 -1.576791\n",
       "2  0.125238 -0.397211 -0.054644 -0.764264 -0.387677\n",
       "3 -0.989066  0.077090 -1.570408 -0.004322 -0.660991"
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "b"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.144132</td>\n",
       "      <td>3.166207</td>\n",
       "      <td>0.453888</td>\n",
       "      <td>-0.120201</td>\n",
       "      <td>-0.612896</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.003320</td>\n",
       "      <td>1.248735</td>\n",
       "      <td>-0.690009</td>\n",
       "      <td>0.005598</td>\n",
       "      <td>-1.576791</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          0         1         2         3         4\n",
       "0  0.144132  3.166207  0.453888 -0.120201 -0.612896\n",
       "1  1.003320  1.248735 -0.690009  0.005598 -1.576791"
      ]
     },
     "execution_count": 69,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "b[:2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0   -0.612896\n",
       "1   -1.576791\n",
       "2   -0.387677\n",
       "3   -0.660991\n",
       "Name: 4, dtype: float64"
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "b[4]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "-0.6128958947046561"
      ]
     },
     "execution_count": 81,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "b.loc[0, 4] "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "-0.6128958947046561"
      ]
     },
     "execution_count": 84,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# b.ix[0, 4]  # 无\n",
    "b.at[0, 4]  # 有"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "-0.6128958947046561"
      ]
     },
     "execution_count": 85,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "b.iat[0, 4]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.144132</td>\n",
       "      <td>3.166207</td>\n",
       "      <td>0.453888</td>\n",
       "      <td>-0.120201</td>\n",
       "      <td>-0.612896</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.003320</td>\n",
       "      <td>1.248735</td>\n",
       "      <td>-0.690009</td>\n",
       "      <td>0.005598</td>\n",
       "      <td>-1.576791</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.125238</td>\n",
       "      <td>-0.397211</td>\n",
       "      <td>-0.054644</td>\n",
       "      <td>-0.764264</td>\n",
       "      <td>-0.387677</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-0.989066</td>\n",
       "      <td>0.077090</td>\n",
       "      <td>-1.570408</td>\n",
       "      <td>-0.004322</td>\n",
       "      <td>-0.660991</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          0         1         2         3         4\n",
       "0  0.144132  3.166207  0.453888 -0.120201 -0.612896\n",
       "1  1.003320  1.248735 -0.690009  0.005598 -1.576791\n",
       "2  0.125238 -0.397211 -0.054644 -0.764264 -0.387677\n",
       "3 -0.989066  0.077090 -1.570408 -0.004322 -0.660991"
      ]
     },
     "execution_count": 86,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "b"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4"
      ]
     },
     "execution_count": 87,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(b)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    0.606226\n",
       "1   -0.001829\n",
       "2   -0.295712\n",
       "3   -0.629539\n",
       "dtype: float64"
      ]
     },
     "execution_count": 88,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "b.mean(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    NaN\n",
       "1    NaN\n",
       "2    1.0\n",
       "3    2.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 89,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s = pd.Series([1, 2, 3, 4]).shift(2)\n",
    "s"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-0.874762</td>\n",
       "      <td>-1.397211</td>\n",
       "      <td>-1.054644</td>\n",
       "      <td>-1.764264</td>\n",
       "      <td>-1.387677</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-2.989066</td>\n",
       "      <td>-1.922910</td>\n",
       "      <td>-3.570408</td>\n",
       "      <td>-2.004322</td>\n",
       "      <td>-2.660991</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          0         1         2         3         4\n",
       "0       NaN       NaN       NaN       NaN       NaN\n",
       "1       NaN       NaN       NaN       NaN       NaN\n",
       "2 -0.874762 -1.397211 -1.054644 -1.764264 -1.387677\n",
       "3 -2.989066 -1.922910 -3.570408 -2.004322 -2.660991"
      ]
     },
     "execution_count": 94,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "b.sub(s, axis=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.144132</td>\n",
       "      <td>3.166207</td>\n",
       "      <td>0.453888</td>\n",
       "      <td>-0.120201</td>\n",
       "      <td>-0.612896</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.003320</td>\n",
       "      <td>1.248735</td>\n",
       "      <td>-0.690009</td>\n",
       "      <td>0.005598</td>\n",
       "      <td>-1.576791</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.125238</td>\n",
       "      <td>-0.397211</td>\n",
       "      <td>-0.054644</td>\n",
       "      <td>-0.764264</td>\n",
       "      <td>-0.387677</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-0.989066</td>\n",
       "      <td>0.077090</td>\n",
       "      <td>-1.570408</td>\n",
       "      <td>-0.004322</td>\n",
       "      <td>-0.660991</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          0         1         2         3         4\n",
       "0  0.144132  3.166207  0.453888 -0.120201 -0.612896\n",
       "1  1.003320  1.248735 -0.690009  0.005598 -1.576791\n",
       "2  0.125238 -0.397211 -0.054644 -0.764264 -0.387677\n",
       "3 -0.989066  0.077090 -1.570408 -0.004322 -0.660991"
      ]
     },
     "execution_count": 95,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "b"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {},
   "outputs": [],
   "source": [
    "b[4] = np.where(b[3] > 0, 1, 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.144132</td>\n",
       "      <td>3.166207</td>\n",
       "      <td>0.453888</td>\n",
       "      <td>-0.120201</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.003320</td>\n",
       "      <td>1.248735</td>\n",
       "      <td>-0.690009</td>\n",
       "      <td>0.005598</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.125238</td>\n",
       "      <td>-0.397211</td>\n",
       "      <td>-0.054644</td>\n",
       "      <td>-0.764264</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-0.989066</td>\n",
       "      <td>0.077090</td>\n",
       "      <td>-1.570408</td>\n",
       "      <td>-0.004322</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          0         1         2         3  4\n",
       "0  0.144132  3.166207  0.453888 -0.120201  0\n",
       "1  1.003320  1.248735 -0.690009  0.005598  1\n",
       "2  0.125238 -0.397211 -0.054644 -0.764264  0\n",
       "3 -0.989066  0.077090 -1.570408 -0.004322  0"
      ]
     },
     "execution_count": 97,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "b"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "metadata": {},
   "outputs": [],
   "source": [
    "c = b"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1, 1, 0, 1])"
      ]
     },
     "execution_count": 100,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.where(c[1] > 0, 1, 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.144132</td>\n",
       "      <td>3.166207</td>\n",
       "      <td>0.453888</td>\n",
       "      <td>-0.120201</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.003320</td>\n",
       "      <td>1.248735</td>\n",
       "      <td>-0.690009</td>\n",
       "      <td>0.005598</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.125238</td>\n",
       "      <td>-0.397211</td>\n",
       "      <td>-0.054644</td>\n",
       "      <td>-0.764264</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-0.989066</td>\n",
       "      <td>0.077090</td>\n",
       "      <td>-1.570408</td>\n",
       "      <td>-0.004322</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          0         1         2         3  4\n",
       "0  0.144132  3.166207  0.453888 -0.120201  0\n",
       "1  1.003320  1.248735 -0.690009  0.005598  1\n",
       "2  0.125238 -0.397211 -0.054644 -0.764264  0\n",
       "3 -0.989066  0.077090 -1.570408 -0.004322  0"
      ]
     },
     "execution_count": 101,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "c"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "s"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
